import scrapy
import re
from xiaozhu.items import XiaozhuItem

class SpiderSpider(scrapy.Spider):
    name = 'spider'
    allowed_domains = ['xiaozhu.com']
    # start_urls = ['http://xiaozhu.com/']

    def start_requests(self):
        url = 'https://sh.xiaozhu.com/'
        yield scrapy.Request(url,callback=self.parse,dont_filter=True)

    def parse(self, response):
        item = XiaozhuItem()
        items = re.findall(r'<img class="lodgeunitpic".*?data-growing-title="(.*?)".*?lazy_src="(.*?)" '
                           r'alt="(.*?)".*?<span class="result_price">.*?<i>(.*?)</i>.*?'
                           r'<em class="hiddenTxt">(.*?)<span class="commenthref">(.*?)</span>',response.text,re.S)
        # print(items)
        for i in items:
            item['name_id'] = i[0]
            item['image_url'] = i[1]
            item['name'] = i[2]
            item['price'] = i[3]
            item['category'] = re.sub('\s','',i[4])
            item['comment'] = re.sub('\s','',i[5])
            yield item
